#delimit; 
clear all;

*** Calculates the share of covid deaths among total deaths by age group in 2020 and 2021 ***;

capture log c;

log using Covid_death_age.log, replace;

import excel "Provisional_COVID-19_Deaths_by_Sex_and_Age.xlsx", sheet("Provisional_COVID-19_Deaths_by_") firstrow case(lower);

keep if group=="By Year" & state=="United States" & sex=="All Sexes" & year<=2021;
keep if agegroup~="0-17 years" & agegroup~="18-29 years" & agegroup~="30-39 years" & agegroup~="40-49 years" & agegroup~="50-64 years";

drop footnote;

bysort year: gen agegroup2=_n;

gen share_covid=covid19deaths*100/totaldeaths;

keep year covid19deaths totaldeaths agegroup share_covid agegroup2;

drop share_covid;
sort agegroup2 year;

collapse (sum) covid19deaths totaldeaths (first) agegroup, by(agegroup2);

gen share_covid=covid19deaths*100/totaldeaths;

list agegroup covid19deaths totaldeaths share_covid;
save covid_death_by_age_sum.dta, replace;

log close;